
clear all


// This file uses candidate-level data to create a district-year level dataset for the lower house to merge with election data in 2_merge_election_data.do

// Inputs are Candidate-level state legislator election results (1967-2016) from Klarner Harvard Dataverse
	// Accessed here: https://dataverse.harvard.edu/file.xhtml?persistentId=doi:10.7910/DVN/3WZFK9/JOH4DS&version=3.0
	// saved as "$InputPath/196slers1967to2016_20180908.tab"


// Accessed here:  http://www.ncsl.org/research/about-state-legislatures/number-of-legislators-and-length-of-terms.aspx
	// and districts per state, stored in /Data/input/districtsperstate.csv :
	
// Candidate-level data
import delim "$InputPath/196slers1967to2016_20180908.tab", clear

replace sab=upper(sab)

gen keep=0

replace keep=1 if (year==2004 | year==2010 | year==2014 ) & (sab!="LA" & sab!="MS" & sab!="AL" & sab!="MD" & sab!="TX") // main years
	replace keep=1 if (year==2002 | year==2010 | year==2014) & (sab=="AL" | sab=="MD") // 4 year terms
	replace keep=1 if (year==2003 | year==2007 | year==2011) & (sab=="LA" | sab=="MS") // odd election years 
	replace keep=1 if (year==2005 | year==2009 | year==2013) & (sab=="NJ" | sab=="VA") // odd election years
	replace keep=1 if (year==2002 | year==2010 | year==2012) & sab=="TX" // bi-annual budget in TX

keep if keep==1 
drop keep

// keep lower house only and Nebraska		  
keep if sen==0 | sab=="NE" 


// keep general elections only and final round in LA
gen secondround=(etype=="sfunset" | etype=="srunoff")
egen exists_secondround=total(secondround), by(sab ddez year )

keep if etype=="g" | (sab=="LA" & (exists_secondround==0 & etype!="sfunset" & etype!="srunoff" ) ) | (sab=="LA" & (exists_secondround>0 & (etype=="sfunset" | etype=="srunoff" )) )  // general elections only  + final round in LA
drop if partyz=="writein" // candidates with only a few votes were coded with name "SCATTERING" in original files, or as 'write-in' in updated files 

// assign election years to fiscal years
rename year eyear
gen 	year=2017 	if eyear==2014 | eyear==2013 | eyear==2012 | eyear==2011 	// in office for fiscal year 2016-2017
replace year=2012 	if eyear==2010 | eyear==2009 | eyear==2007					// in office for fiscal year 2011-2012
replace year=2007	if eyear==2004 | eyear==2005 | eyear==2003 | eyear==2002	// in office for fiscal year 2006-2007

rename (sfips month ddez dtype eseats term exper out ) (fstate electmonth sldl multimember nmember termlength incumbent winner )

label define memberlabel 1 "Single member" 2 "Multimember with posts" 3 "Multimember free for all"
label values multimember memberlabel

replace nmember=1 if fstate==22 // Louisiana `eseats' variable is coded differently due to runoffs, but we want the number of elected members; see Klarner's codebook.

gen ndemcand=(partyz=="d")
gen nrepcand=(partyz=="r")
gen nothercand=(partyz!="d" & partyz!="r")

// Use standardized district from Census names
// sldl codes for sldl district names used in Klarner were identified in Census Gazeteer (2010, 2014) for select states
gen sldlname=sldl

do "$CodePath/elections_csld_edits/sldlnames_MA.do" 
do "$CodePath/elections_csld_edits/sldlnames_MD.do" 
do "$CodePath/elections_csld_edits/sldlnames_MN.do" 
do "$CodePath/elections_csld_edits/sldlnames_NH.do"
do "$CodePath/elections_csld_edits/sldlnames_VT.do"
do "$CodePath/elections_csld_edits/sldlnames_SD.do"


// ID and WA have two seats per district, use codes generated in sldlcodes.do
replace sldl=subinstr(sldl,"-A","",.) if sab=="ID"
replace sldl=subinstr(sldl,"-B","",.) if sab=="ID"
replace sldl=subinstr(sldl," Position 1","",.) if sab=="WA"
replace sldl=subinstr(sldl," Position 2","",.) if sab=="WA"

// NV district number is included in string
replace sldl=substr(sldlname,-2,2) if sab=="NV" & year!=2017
replace sldl= strtrim(sldl) if sab=="NV"

// WV districts in jefferson county named 55 and 56 in census in 2004: https://www2.census.gov/plmap/pl_vtd/st54_WestVirginia/

replace sldl="55" if sldl=="57" & year==2007 & sab=="WV" 
replace sldl="56" if sldl=="58" & year==2007 & sab=="WV"

destring sldl, replace

gen incumbentwon=incumbent == "inc" & winner=="w"
gen demwon=(winner=="w")*ndemcand
gen repwon=(winner=="w")*nrepcand
gen indwon=(winner=="w")*nothercand

tempfile base
save `base', replace

// Merge with number of districts
import delim "$InputPath/districtsperstate.csv", clear
rename ?state state
statastates, name(state)
keep if _merge==3 //(drop total and DC)
drop _merge state_abbrev state
rename state_fips fstate

merge 1:m fstate using `base' //all merge except Nebraska (one house legislature)
drop _m

// address NH floterial districts (census BAF has only component districts, but some counties have an additional floterial district) 
unique flot2, by(sldl fstate year) gen(floterial_nmember)

drop if flot==1


//collapse to candidate-year-level observation--some candidates are listed twice if they are nominated by multiple parties (e.g.,  NY) or in multiple counties (e.g., WY)
egen tag=tag(year fstate sldl candid) 
keep if tag==1 

//collapse to district-year-level 
collapse (mean) eyear totaldistricts* *term electmonth multimember nmember floterial_nmember termlength (sum) ndemcand nrepcand nothercand (sum) demwon repwon indwon incumbentwon, by(year fstate sldl)


cd "$OutputPath"
save statelegislators, replace





